/* Clear workspace */
clear all

************************************************************************************
* DESCRIPTION: This file applies propensity score matching (PSM) techniques
*              to the dataset. The package `psmatch2` is required.
*				FinTech vs bank borrowers
***********************************************************************************

*************************************************************
* Matching based on [t-3, t-1] firm characteristics
* Matching done within industry and department cells
*************************************************************

/* Start logging */
capture log close
log using "$logs\propensity_score_matching.log", replace

/* Load dataset */
use "$output\ifp_scrapped_aggregated_industries_modified.dta", clear

/* Filter dataset to the period of interest */
keep if mdate < mofd(date("20200101","YMD"))
keep if mdate > mofd(date("20131231","YMD"))

/* Compute total loan amount and leasing component */
gen loan = stloan + ltloan + bm + bi + oc
gen leasing = bm + bi

/* Apply log transformation */
foreach vari in loan {
    replace `vari' = log(1 + `vari')
}

/*************************************************************
* Handling missing values for key variables
*************************************************************/

foreach var of varlist outsideloan r3filled r5filled r13filled r_collateral_filled logassets_filled logemployees { 
    gen m_`var' = `var' == .
    replace `var' = 0 if `var' == .
}

/*************************************************************
* Constant firm characteristics
*************************************************************/

preserve
    keep if delta < 0
    collapse (sum) co, by(siren)
    gen codummy = co > 0
    drop co
    save "$output\psm_co_dummy.dta", replace
restore

preserve 
    keep if delta == -11
    keep siren outloanyear size rating industry ageatevent r3filled m_r3filled r5filled m_r5filled r13filled m_r13filled r_collateral_filled m_r_collateral_filled outsideloan logassets_filled m_logassets_filled logemployees m_logemployees
    duplicates drop siren, force
    merge 1:1 siren using "$output\psm_co_dummy.dta"
    keep if _merge == 3
    drop _merge
    save "$output\psm_stable_keep.dta", replace    
restore

/*************************************************************
* Reshape dataset for matching
*************************************************************/

drop datemonth
keep if delta <= 0 & delta >= -6
gen delta2 = delta + 6
drop delta
duplicates drop siren delta2, force

/* Define matching variables */
global xlist "outsideloan i.outloanyear i.codummy ageatevent i.rating r3filled i.m_r3filled r13filled i.m_r13filled r_collateral_filled i.m_r_collateral_filled"
global ylist "loan"
global celllist "industry size"

keep siren p2p delta2 $celllist $ylist
sort siren delta2
duplicates drop siren, force

merge 1:m siren using "$output\psm_stable_keep.dta"
keep if _merge == 3
drop _merge

/*************************************************************
* 1. Propensity Score Matching 1:1 (Nearest Neighbor)
*************************************************************/

preserve
    capture drop _*
    capture graph drop _all

    foreach name in pscore support weight id n1 nn {
        gen `name' = .
    }

    /* Create matching cells */
    egen cell = group($celllist)
    qui levels cell, local(gr)
    foreach i of local gr {
        capture noisily psmatch2 p2p loan0 loan1 loan2 loan3 loan4 loan5 $xlist ///
        if cell == `i', out(loan6) noreplacement common 

        if c(rc) == 0 {
            display "This is cell `i'"
            foreach var of varlist pscore support weight id n1 nn {
                replace `var' = _`var' if cell == `i'
            }
            capture psgraph, name(cell`i',replace) bin(30)
            capture pstest loan0 loan1 loan2 loan3 loan4 loan5 $xlist
        }
        else {
            display "Insufficient observations for cell == `i': skipping to next value of i"
        }
    }

    /* Save matched sample */
    keep siren pscore support weight id n1 nn
    keep if weight ~= .
    merge 1:m siren using "$output\ifp_scrapped_aggregated_industries_modified.dta"
    keep if _merge == 3
    drop _merge
    save "$output\matched_sample_ps_norep_industry_year.dta", replace
restore

/*************************************************************
* 2. Propensity Score Matching 1:n
*************************************************************/

preserve
    capture drop _*
    capture graph drop _all

    foreach name in pscore support weight id n1 nn {
        gen `name' = .
    }

    egen cell = group($celllist)
    qui levels cell, local(gr)
    foreach i of local gr {
        capture psmatch2 p2p loan0 loan1 loan2 loan3 loan4 loan5 $xlist ///
        if cell == `i', out(loan6) common 

        if c(rc) == 0 {
            display "This is cell `i'"
            foreach var of varlist pscore support weight id n1 nn {
                replace `var' = _`var' if cell == `i'
            }
            capture psgraph, name(cell`i',replace) bin(30)
            capture pstest loan0 loan1 loan2 loan3 loan4 loan5 $xlist
        }
        else {
            display "Insufficient observations for cell == `i': skipping to next value of i"
        }
    }

    keep siren pscore support weight id n1 nn
    keep if weight ~= .
    merge 1:m siren using "$output\ifp_scrapped_aggregated_industries_modified.dta"
    keep if _merge == 3
    drop _merge
    save "$output\matched_sample_ps_rep_industry_year.dta", replace
restore

/*************************************************************
* 3. k-Nearest Neighbor Matching
*************************************************************/

preserve
    capture drop _*
    foreach name in pscore support weight id n1 n2 n3 n4 n5 nn {
        gen `name' = .
    }

    egen cell = group($celllist)
    qui levels cell, local(gr)
    foreach i of local gr {
        capture psmatch2 p2p loan0 loan1 loan2 loan3 loan4 loan5 $xlist ///
        if cell == `i', out(loan6) neighbor(5) common 

        if c(rc) == 0 {
            display "This is cell `i'"
            foreach var of varlist pscore support weight id n1-n5 nn {
                replace `var' = _`var' if cell == `i'
            }
        }
        else {
            display "Insufficient observations for cell == `i': skipping to next value of i"
        }
    }

    keep siren pscore support weight id n1-n5 nn
    keep if weight ~= .
    merge 1:m siren using "$output\ifp_scrapped_aggregated_industries_modified.dta"
    keep if _merge == 3
    drop _merge
    save "$output\matched_sample_nn.dta", replace
restore

/* Close log file */
log close
